from com.zjs.crawer.urlcontent.baseurlcontent import BaseUrlContent
from com.zjs.zjsqueue import zurlpathqueue
from bs4 import BeautifulSoup as BS
from com.zjs.util.download import request
from com.zjs.zjsqueue import zurlcontentqueue
from queue import Empty
import logging

class ZhiLianPath(BaseUrlContent):
    
    name = "zhilian"
    
    def __init__(self):
        logging.debug("[path]["+self.name+"]:start!")
    
    def run(self):
        while True:
            try:
                url = zurlpathqueue.get(self.name)
#                 print("[path][lianjia]:"+url)
                self.dojob(url)
            except Exception as ex:
                logging.error("[path]["+self.name+"]:"+str(url)+"解析失败")
                logging.error(ex)
        
    def dojob(self,url_b):
        html=request.get(url_b,3)
        soup=BS(html.content,"lxml")
        # 保存A类链接的集合,避免重复,用set去重
        # 通过分析页面,抓取所有的class="pic-panel"的div元素下的a标签的href
        # 先通过find_all抓取所有符合条件的div
        try:
            divs_=soup.find_all("td",attrs={"class":"zwmc"})
            for div_ in divs_:
                a_ = div_.find("a")
                zurlcontentqueue.put(self.name,a_['href'])
        except:
            pass
